; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zihintntl,+f,+d,+zfh < %s | FileCheck %s -check-prefix=CHECK-RV64
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zihintntl,+f,+d,+zfh < %s | FileCheck %s -check-prefix=CHECK-RV32
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zihintntl,+f,+d,+zfh,+c < %s | FileCheck %s -check-prefix=CHECK-RV64C
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zihintntl,+f,+d,+zfh,+c < %s | FileCheck %s -check-prefix=CHECK-RV32C
; RUN: llc -mtriple=riscv64 -mattr=+experimental-zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV64V
; RUN: llc -mtriple=riscv32 -mattr=+experimental-zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV32V

define i64 @test_nontemporal_load_i64(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_i64:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_i64:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a2, 0(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a1, 4(a0)
; CHECK-RV32-NEXT:    mv a0, a2
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_i64:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_i64:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a2, 0(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a1, 4(a0)
; CHECK-RV32C-NEXT:    mv a0, a2
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_i64:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    ld a0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_i64:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    lw a2, 0(a0)
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    lw a1, 4(a0)
; CHECK-RV32V-NEXT:    mv a0, a2
; CHECK-RV32V-NEXT:    ret

  %1 = load i64, ptr %p, !nontemporal !0
  ret i64 %1
}

define i32 @test_nontemporal_load_i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_i32:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    lw a0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_i32:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_i32:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    lw a0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_i32:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_i32:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    lw a0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_i32:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    lw a0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load i32, ptr %p, !nontemporal !0
  ret i32 %1
}

define i16 @test_nontemporal_load_i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_i16:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    lh a0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_i16:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lh a0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_i16:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    lh a0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_i16:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lh a0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_i16:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    lh a0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_i16:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    lh a0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load i16, ptr %p, !nontemporal !0
  ret i16 %1
}

define i8 @test_nontemporal_load_i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_i8:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    lbu a0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_i8:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lbu a0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_i8:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    lbu a0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_i8:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lbu a0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_i8:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    lbu a0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_i8:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    lbu a0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load i8, ptr %p, !nontemporal !0
  ret i8 %1
}

define half @test_nontemporal_load_half(ptr %p) nounwind {
; CHECK-RV64-LABEL: test_nontemporal_load_half:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    flh fa5, 0(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    flh fa4, 6(a0)
; CHECK-RV64-NEXT:    fadd.h fa0, fa5, fa4
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_half:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    flh fa5, 0(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    flh fa4, 6(a0)
; CHECK-RV32-NEXT:    fadd.h fa0, fa5, fa4
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_half:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    flh fa5, 0(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    flh fa4, 6(a0)
; CHECK-RV64C-NEXT:    fadd.h fa0, fa5, fa4
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_half:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    flh fa5, 0(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    flh fa4, 6(a0)
; CHECK-RV32C-NEXT:    fadd.h fa0, fa5, fa4
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_half:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    flh fa5, 0(a0)
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    flh fa4, 6(a0)
; CHECK-RV64V-NEXT:    fadd.h fa0, fa5, fa4
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_half:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    flh fa5, 0(a0)
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    flh fa4, 6(a0)
; CHECK-RV32V-NEXT:    fadd.h fa0, fa5, fa4
; CHECK-RV32V-NEXT:    ret

  %1 = load half, ptr %p, !nontemporal !0
  %2 = getelementptr half, ptr %p, i32 3
  %3 = load half, ptr %2, !nontemporal !0
  %4 = fadd half %1, %3
  ret half %4
}

define float @test_nontemporal_load_float(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_float:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    flw fa0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_float:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    flw fa0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_float:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    flw fa0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_float:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    flw fa0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_float:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    flw fa0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_float:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    flw fa0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load float, ptr %p, !nontemporal !0
  ret float %1
}

define double @test_nontemporal_load_double(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_double:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    fld fa0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_double:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    fld fa0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_double:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    fld fa0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_double:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    fld fa0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_double:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    fld fa0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_double:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    fld fa0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load double, ptr %p, !nontemporal !0
  ret double %1
}

define <16 x i8> @test_nontemporal_load_v16i8(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a2, 8(a1)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a1, 0(a1)
; CHECK-RV64-NEXT:    sd a2, 8(a0)
; CHECK-RV64-NEXT:    sd a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a2, 12(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a3, 8(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a4, 4(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a1, 0(a1)
; CHECK-RV32-NEXT:    sw a2, 12(a0)
; CHECK-RV32-NEXT:    sw a3, 8(a0)
; CHECK-RV32-NEXT:    sw a4, 4(a0)
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a2, 8(a1)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a1, 0(a1)
; CHECK-RV64C-NEXT:    sd a2, 8(a0)
; CHECK-RV64C-NEXT:    sd a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a2, 12(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a3, 8(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a4, 4(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a1, 0(a1)
; CHECK-RV32C-NEXT:    sw a2, 12(a0)
; CHECK-RV32C-NEXT:    sw a3, 8(a0)
; CHECK-RV32C-NEXT:    sw a4, 4(a0)
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_v16i8:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load <16 x i8>, ptr %p, !nontemporal !0
  ret <16 x i8> %1
}

define <8 x i16> @test_nontemporal_load_v8i16(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a2, 8(a1)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a1, 0(a1)
; CHECK-RV64-NEXT:    sd a2, 8(a0)
; CHECK-RV64-NEXT:    sd a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a2, 12(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a3, 8(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a4, 4(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a1, 0(a1)
; CHECK-RV32-NEXT:    sw a2, 12(a0)
; CHECK-RV32-NEXT:    sw a3, 8(a0)
; CHECK-RV32-NEXT:    sw a4, 4(a0)
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a2, 8(a1)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a1, 0(a1)
; CHECK-RV64C-NEXT:    sd a2, 8(a0)
; CHECK-RV64C-NEXT:    sd a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a2, 12(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a3, 8(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a4, 4(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a1, 0(a1)
; CHECK-RV32C-NEXT:    sw a2, 12(a0)
; CHECK-RV32C-NEXT:    sw a3, 8(a0)
; CHECK-RV32C-NEXT:    sw a4, 4(a0)
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vle16.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_v8i16:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vle16.v v8, (a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load <8 x i16>, ptr %p, !nontemporal !0
  ret <8 x i16> %1
}

define <4 x i32> @test_nontemporal_load_v4i32(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a2, 8(a1)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a1, 0(a1)
; CHECK-RV64-NEXT:    sd a2, 8(a0)
; CHECK-RV64-NEXT:    sd a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a2, 12(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a3, 8(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a4, 4(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a1, 0(a1)
; CHECK-RV32-NEXT:    sw a2, 12(a0)
; CHECK-RV32-NEXT:    sw a3, 8(a0)
; CHECK-RV32-NEXT:    sw a4, 4(a0)
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a2, 8(a1)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a1, 0(a1)
; CHECK-RV64C-NEXT:    sd a2, 8(a0)
; CHECK-RV64C-NEXT:    sd a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a2, 12(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a3, 8(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a4, 4(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a1, 0(a1)
; CHECK-RV32C-NEXT:    sw a2, 12(a0)
; CHECK-RV32C-NEXT:    sw a3, 8(a0)
; CHECK-RV32C-NEXT:    sw a4, 4(a0)
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vle32.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_v4i32:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vle32.v v8, (a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load <4 x i32>, ptr %p, !nontemporal !0
  ret <4 x i32> %1
}

define <2 x i64> @test_nontemporal_load_v2i64(ptr %p) {
; CHECK-RV64-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a2, 0(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    ld a1, 8(a0)
; CHECK-RV64-NEXT:    mv a0, a2
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a2, 12(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a3, 8(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a4, 4(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    lw a1, 0(a1)
; CHECK-RV32-NEXT:    sw a2, 12(a0)
; CHECK-RV32-NEXT:    sw a3, 8(a0)
; CHECK-RV32-NEXT:    sw a4, 4(a0)
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a2, 0(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    ld a1, 8(a0)
; CHECK-RV64C-NEXT:    mv a0, a2
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a2, 12(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a3, 8(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a4, 4(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    lw a1, 0(a1)
; CHECK-RV32C-NEXT:    sw a2, 12(a0)
; CHECK-RV32C-NEXT:    sw a3, 8(a0)
; CHECK-RV32C-NEXT:    sw a4, 4(a0)
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vle64.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_load_v2i64:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vle64.v v8, (a0)
; CHECK-RV32V-NEXT:    ret

  %1 = load <2 x i64>, ptr %p, !nontemporal !0
  ret <2 x i64> %1
}

define void @test_nontemporal_store_i64(ptr %p, i64 %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_i64:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sd a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_i64:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a2, 4(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_i64:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sd a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_i64:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a2, 4(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_i64:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    sd a1, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_i64:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    sw a2, 4(a0)
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    sw a1, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store i64 %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_i32(ptr %p, i32 %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_i32:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sw a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_i32:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_i32:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sw a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_i32:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_i32:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    sw a1, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_i32:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    sw a1, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store i32 %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_i16(ptr %p, i16 %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_i16:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_i16:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_i16:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_i16:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_i16:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    sh a1, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_i16:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    sh a1, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store i16 %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_i8(ptr %p, i8 %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_i8:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_i8:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_i8:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_i8:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_i8:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    sb a1, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_i8:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    sb a1, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store i8 %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_half(ptr %p, half %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_half:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    fsh fa0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_half:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    fsh fa0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_half:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    fsh fa0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_half:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    fsh fa0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_half:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    fsh fa0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_half:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    fsh fa0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store half %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_float(ptr %p, float %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_float:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    fsw fa0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_float:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    fsw fa0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_float:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    fsw fa0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_float:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    fsw fa0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_float:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    fsw fa0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_float:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    fsw fa0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store float %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_double(ptr %p, double %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_double:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    fsd fa0, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_double:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    fsd fa0, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_double:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    fsd fa0, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_double:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    fsd fa0, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_double:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    fsd fa0, 0(a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_double:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    fsd fa0, 0(a0)
; CHECK-RV32V-NEXT:    ret

  store double %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v16i8:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    addi sp, sp, -16
; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 16
; CHECK-RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
; CHECK-RV64-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
; CHECK-RV64-NEXT:    .cfi_offset s0, -8
; CHECK-RV64-NEXT:    .cfi_offset s1, -16
; CHECK-RV64-NEXT:    lbu a2, 0(a1)
; CHECK-RV64-NEXT:    lbu a3, 8(a1)
; CHECK-RV64-NEXT:    lbu a4, 16(a1)
; CHECK-RV64-NEXT:    lbu a5, 24(a1)
; CHECK-RV64-NEXT:    lbu a6, 32(a1)
; CHECK-RV64-NEXT:    lbu a7, 40(a1)
; CHECK-RV64-NEXT:    lbu t0, 48(a1)
; CHECK-RV64-NEXT:    lbu t1, 56(a1)
; CHECK-RV64-NEXT:    lbu t2, 64(a1)
; CHECK-RV64-NEXT:    lbu t3, 72(a1)
; CHECK-RV64-NEXT:    lbu t4, 80(a1)
; CHECK-RV64-NEXT:    lbu t5, 88(a1)
; CHECK-RV64-NEXT:    lbu t6, 120(a1)
; CHECK-RV64-NEXT:    lbu s0, 112(a1)
; CHECK-RV64-NEXT:    lbu s1, 104(a1)
; CHECK-RV64-NEXT:    lbu a1, 96(a1)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t6, 15(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb s0, 14(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb s1, 13(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a1, 12(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t5, 11(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t4, 10(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t3, 9(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t2, 8(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t1, 7(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb t0, 6(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a7, 5(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a6, 4(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a5, 3(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a4, 2(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a3, 1(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sb a2, 0(a0)
; CHECK-RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64-NEXT:    addi sp, sp, 16
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v16i8:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    addi sp, sp, -16
; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
; CHECK-RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
; CHECK-RV32-NEXT:    .cfi_offset s0, -4
; CHECK-RV32-NEXT:    .cfi_offset s1, -8
; CHECK-RV32-NEXT:    lbu a2, 0(a1)
; CHECK-RV32-NEXT:    lbu a3, 4(a1)
; CHECK-RV32-NEXT:    lbu a4, 8(a1)
; CHECK-RV32-NEXT:    lbu a5, 12(a1)
; CHECK-RV32-NEXT:    lbu a6, 16(a1)
; CHECK-RV32-NEXT:    lbu a7, 20(a1)
; CHECK-RV32-NEXT:    lbu t0, 24(a1)
; CHECK-RV32-NEXT:    lbu t1, 28(a1)
; CHECK-RV32-NEXT:    lbu t2, 32(a1)
; CHECK-RV32-NEXT:    lbu t3, 36(a1)
; CHECK-RV32-NEXT:    lbu t4, 40(a1)
; CHECK-RV32-NEXT:    lbu t5, 44(a1)
; CHECK-RV32-NEXT:    lbu t6, 60(a1)
; CHECK-RV32-NEXT:    lbu s0, 56(a1)
; CHECK-RV32-NEXT:    lbu s1, 52(a1)
; CHECK-RV32-NEXT:    lbu a1, 48(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t6, 15(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb s0, 14(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb s1, 13(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a1, 12(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t5, 11(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t4, 10(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t3, 9(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t2, 8(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t1, 7(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb t0, 6(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a7, 5(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a6, 4(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a5, 3(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a4, 2(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a3, 1(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sb a2, 0(a0)
; CHECK-RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32-NEXT:    addi sp, sp, 16
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v16i8:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    addi sp, sp, -16
; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 16
; CHECK-RV64C-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
; CHECK-RV64C-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
; CHECK-RV64C-NEXT:    .cfi_offset s0, -8
; CHECK-RV64C-NEXT:    .cfi_offset s1, -16
; CHECK-RV64C-NEXT:    lbu a6, 0(a1)
; CHECK-RV64C-NEXT:    lbu a7, 8(a1)
; CHECK-RV64C-NEXT:    lbu t0, 16(a1)
; CHECK-RV64C-NEXT:    lbu t1, 24(a1)
; CHECK-RV64C-NEXT:    lbu t2, 32(a1)
; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
; CHECK-RV64C-NEXT:    lbu a2, 120(a1)
; CHECK-RV64C-NEXT:    lbu s0, 112(a1)
; CHECK-RV64C-NEXT:    lbu s1, 104(a1)
; CHECK-RV64C-NEXT:    lbu a1, 96(a1)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a2, 15(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb s0, 14(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb s1, 13(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a1, 12(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a5, 11(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a4, 10(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a3, 9(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t6, 8(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t5, 7(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t4, 6(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t3, 5(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t2, 4(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t1, 3(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb t0, 2(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a7, 1(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sb a6, 0(a0)
; CHECK-RV64C-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
; CHECK-RV64C-NEXT:    addi sp, sp, 16
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v16i8:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    addi sp, sp, -16
; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 16
; CHECK-RV32C-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
; CHECK-RV32C-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
; CHECK-RV32C-NEXT:    .cfi_offset s0, -4
; CHECK-RV32C-NEXT:    .cfi_offset s1, -8
; CHECK-RV32C-NEXT:    lbu a6, 0(a1)
; CHECK-RV32C-NEXT:    lbu a7, 4(a1)
; CHECK-RV32C-NEXT:    lbu t0, 8(a1)
; CHECK-RV32C-NEXT:    lbu t1, 12(a1)
; CHECK-RV32C-NEXT:    lbu t2, 16(a1)
; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
; CHECK-RV32C-NEXT:    lbu a2, 60(a1)
; CHECK-RV32C-NEXT:    lbu s0, 56(a1)
; CHECK-RV32C-NEXT:    lbu s1, 52(a1)
; CHECK-RV32C-NEXT:    lbu a1, 48(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a2, 15(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb s0, 14(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb s1, 13(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a1, 12(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a5, 11(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a4, 10(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a3, 9(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t6, 8(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t5, 7(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t4, 6(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t3, 5(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t2, 4(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t1, 3(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb t0, 2(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a7, 1(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sb a6, 0(a0)
; CHECK-RV32C-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
; CHECK-RV32C-NEXT:    addi sp, sp, 16
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v16i8:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_v16i8:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
; CHECK-RV32V-NEXT:    ret
  store <16 x i8> %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    lh a2, 0(a1)
; CHECK-RV64-NEXT:    lh a3, 8(a1)
; CHECK-RV64-NEXT:    lh a4, 16(a1)
; CHECK-RV64-NEXT:    lh a5, 24(a1)
; CHECK-RV64-NEXT:    lh a6, 56(a1)
; CHECK-RV64-NEXT:    lh a7, 48(a1)
; CHECK-RV64-NEXT:    lh t0, 40(a1)
; CHECK-RV64-NEXT:    lh a1, 32(a1)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a6, 14(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a7, 12(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh t0, 10(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a1, 8(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a5, 6(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a4, 4(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a3, 2(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sh a2, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    lh a2, 0(a1)
; CHECK-RV32-NEXT:    lh a3, 4(a1)
; CHECK-RV32-NEXT:    lh a4, 8(a1)
; CHECK-RV32-NEXT:    lh a5, 12(a1)
; CHECK-RV32-NEXT:    lh a6, 28(a1)
; CHECK-RV32-NEXT:    lh a7, 24(a1)
; CHECK-RV32-NEXT:    lh t0, 20(a1)
; CHECK-RV32-NEXT:    lh a1, 16(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a6, 14(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a7, 12(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh t0, 10(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a1, 8(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a5, 6(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a4, 4(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a3, 2(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sh a2, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    lh a6, 0(a1)
; CHECK-RV64C-NEXT:    lh a7, 8(a1)
; CHECK-RV64C-NEXT:    lh t0, 16(a1)
; CHECK-RV64C-NEXT:    lh a5, 24(a1)
; CHECK-RV64C-NEXT:    lh a2, 56(a1)
; CHECK-RV64C-NEXT:    lh a3, 48(a1)
; CHECK-RV64C-NEXT:    lh a4, 40(a1)
; CHECK-RV64C-NEXT:    lh a1, 32(a1)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a2, 14(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a3, 12(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a4, 10(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a1, 8(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a5, 6(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh t0, 4(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a7, 2(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sh a6, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    lh a6, 0(a1)
; CHECK-RV32C-NEXT:    lh a7, 4(a1)
; CHECK-RV32C-NEXT:    lh t0, 8(a1)
; CHECK-RV32C-NEXT:    lh a5, 12(a1)
; CHECK-RV32C-NEXT:    lh a2, 28(a1)
; CHECK-RV32C-NEXT:    lh a3, 24(a1)
; CHECK-RV32C-NEXT:    lh a4, 20(a1)
; CHECK-RV32C-NEXT:    lh a1, 16(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a2, 14(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a3, 12(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a4, 10(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a1, 8(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a5, 6(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh t0, 4(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a7, 2(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sh a6, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vse16.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_v8i16:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vse16.v v8, (a0)
; CHECK-RV32V-NEXT:    ret
  store <8 x i16> %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_v4i32(ptr %p, <4 x i32> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    lw a2, 24(a1)
; CHECK-RV64-NEXT:    lw a3, 16(a1)
; CHECK-RV64-NEXT:    lw a4, 8(a1)
; CHECK-RV64-NEXT:    lw a1, 0(a1)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sw a2, 12(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sw a3, 8(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sw a4, 4(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sw a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    lw a2, 12(a1)
; CHECK-RV32-NEXT:    lw a3, 8(a1)
; CHECK-RV32-NEXT:    lw a4, 4(a1)
; CHECK-RV32-NEXT:    lw a1, 0(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a2, 12(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a3, 8(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a4, 4(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    lw a2, 24(a1)
; CHECK-RV64C-NEXT:    lw a3, 16(a1)
; CHECK-RV64C-NEXT:    lw a4, 8(a1)
; CHECK-RV64C-NEXT:    lw a1, 0(a1)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sw a2, 12(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sw a3, 8(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sw a4, 4(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sw a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    lw a2, 12(a1)
; CHECK-RV32C-NEXT:    lw a3, 8(a1)
; CHECK-RV32C-NEXT:    lw a4, 4(a1)
; CHECK-RV32C-NEXT:    lw a1, 0(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a2, 12(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a3, 8(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a4, 4(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vse32.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_v4i32:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vse32.v v8, (a0)
; CHECK-RV32V-NEXT:    ret
  store <4 x i32> %v, ptr %p, !nontemporal !0
  ret void
}

define void @test_nontemporal_store_v2i64(ptr %p, <2 x i64> %v) {
; CHECK-RV64-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV64:       # %bb.0:
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sd a2, 8(a0)
; CHECK-RV64-NEXT:    ntl.all
; CHECK-RV64-NEXT:    sd a1, 0(a0)
; CHECK-RV64-NEXT:    ret
;
; CHECK-RV32-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV32:       # %bb.0:
; CHECK-RV32-NEXT:    lw a2, 12(a1)
; CHECK-RV32-NEXT:    lw a3, 8(a1)
; CHECK-RV32-NEXT:    lw a4, 4(a1)
; CHECK-RV32-NEXT:    lw a1, 0(a1)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a2, 12(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a3, 8(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a4, 4(a0)
; CHECK-RV32-NEXT:    ntl.all
; CHECK-RV32-NEXT:    sw a1, 0(a0)
; CHECK-RV32-NEXT:    ret
;
; CHECK-RV64C-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV64C:       # %bb.0:
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sd a2, 8(a0)
; CHECK-RV64C-NEXT:    c.ntl.all
; CHECK-RV64C-NEXT:    sd a1, 0(a0)
; CHECK-RV64C-NEXT:    ret
;
; CHECK-RV32C-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV32C:       # %bb.0:
; CHECK-RV32C-NEXT:    lw a2, 12(a1)
; CHECK-RV32C-NEXT:    lw a3, 8(a1)
; CHECK-RV32C-NEXT:    lw a4, 4(a1)
; CHECK-RV32C-NEXT:    lw a1, 0(a1)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a2, 12(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a3, 8(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a4, 4(a0)
; CHECK-RV32C-NEXT:    c.ntl.all
; CHECK-RV32C-NEXT:    sw a1, 0(a0)
; CHECK-RV32C-NEXT:    ret
;
; CHECK-RV64V-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV64V:       # %bb.0:
; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-RV64V-NEXT:    ntl.all
; CHECK-RV64V-NEXT:    vse64.v v8, (a0)
; CHECK-RV64V-NEXT:    ret
;
; CHECK-RV32V-LABEL: test_nontemporal_store_v2i64:
; CHECK-RV32V:       # %bb.0:
; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
; CHECK-RV32V-NEXT:    ntl.all
; CHECK-RV32V-NEXT:    vse64.v v8, (a0)
; CHECK-RV32V-NEXT:    ret
  store <2 x i64> %v, ptr %p, !nontemporal !0
  ret void
}

!0 = !{i32 1}
